Data

Which years are missing a metric

dat %>% 
  ggplot(aes(year, lakeid, fill=is.na(daynum))) +
  geom_tile(color="grey") +
  theme_bw() +
  facet_wrap(~metric, nrow=6) 

  # labs(fill = "Missing") + 
  # scale_fill_scico(palette = 'imola')

# dat %>% 
#   ggplot(aes(year, lakeid, fill=daynum)) +
#   geom_tile(color="grey") +
#   theme_bw() +
#   facet_wrap(~metric, nrow=6) + 
#   # labs(fill = "Missing") + 
#   scale_fill_viridis_c()

Remaining data issues:

  • Northern lakes don’t have DOC until until 1986; missing first 4 years when all other variables are available. Potential fixes:
    • Exclude DOC; start analyses in 1986; fill DOC with e.g., median value, or model of other variables?
  • Two missing iceoff dates in AL (1982) and CB (1988). Potential fix:
    • Fill with function/model of other lake ice on dates
  • Southern lakes missing chlorophyll data: 1996 - 1999 and 2002 - 2004; 2002-04 is bad/uncalibrated values, used DOY but not magnitude; earlier years fill with median
  • Zoop data issues:
    • CB missing daphnia peak 1995(?): fill with medain
    • FI missing several years from 1996 - 2006; fill with medain

Predict ice-on in northern lakes from other lakes ice-on dates

n_iceoff_wide = dat %>% 
  select(-sampledate)%>% 
  filter(lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR") & metric == "iceoff") %>% 
  pivot_wider(names_from = lakeid, values_from = daynum)

al_iceoff_model = lm(AL~BM+CB+CR+SP+TB+TR, data=n_iceoff_wide)
summary(al_iceoff_model)
## 
## Call:
## lm(formula = AL ~ BM + CB + CR + SP + TB + TR, data = n_iceoff_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3721 -0.8451  0.1265  0.9507  2.7265 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -3.10967    3.27070  -0.951   0.3496  
## BM          -0.03295    0.18851  -0.175   0.8624  
## CB           0.24815    0.12648   1.962   0.0594 .
## CR           0.20333    0.17076   1.191   0.2434  
## SP           0.31430    0.20384   1.542   0.1339  
## TB           0.19500    0.10280   1.897   0.0678 .
## TR           0.09211    0.13278   0.694   0.4934  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.779 on 29 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.9793, Adjusted R-squared:  0.975 
## F-statistic: 228.9 on 6 and 29 DF,  p-value: < 2.2e-16
plot(n_iceoff_wide$AL, predict(al_iceoff_model, n_iceoff_wide), pch=16)

# actually looks good; use to fill
cb_iceoff_model = lm(CB~AL+BM+CR+SP+TB+TR, data=n_iceoff_wide)
summary(cb_iceoff_model)
## 
## Call:
## lm(formula = CB ~ AL + BM + CR + SP + TB + TR, data = n_iceoff_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3604 -0.8546  0.0970  1.5519  3.4296 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  3.02887    4.54691   0.666   0.5106  
## AL           0.47220    0.24068   1.962   0.0594 .
## BM           0.04037    0.26007   0.155   0.8777  
## CR           0.46368    0.22535   2.058   0.0487 *
## SP           0.04008    0.29239   0.137   0.8919  
## TB           0.08250    0.14956   0.552   0.5854  
## TR          -0.14222    0.18277  -0.778   0.4428  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.455 on 29 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.9584, Adjusted R-squared:  0.9498 
## F-statistic: 111.5 on 6 and 29 DF,  p-value: < 2.2e-16
plot(n_iceoff_wide$CB, predict(cb_iceoff_model, n_iceoff_wide), pch=16)

# really good; go ahead and fill

al_missing_iceoff = n_iceoff_wide %>% filter(is.na(AL))
al_missing_iceoff$iceoff_fill = round(predict(al_iceoff_model, al_missing_iceoff))
al_iceoff_fill = al_missing_iceoff %>% 
  select(metric, year, iceoff_fill) %>% 
  rename(daynum_fill = iceoff_fill) %>% 
  mutate(metric = "iceoff", lakeid = "AL") 

cb_missing_iceoff = n_iceoff_wide %>% filter(is.na(CB))
cb_missing_iceoff$iceoff_fill = round(predict(cb_iceoff_model, cb_missing_iceoff))
cb_iceoff_fill = cb_missing_iceoff %>% 
  select(metric, year, iceoff_fill) %>% 
  rename(daynum_fill = iceoff_fill) %>% 
  mutate(metric = "iceoff", lakeid = "CB")

iceoff_fill = bind_rows(al_iceoff_fill, cb_iceoff_fill)

Predict DOC daynum from other variable daynum’s in each northern lake

no_dups = dat %>% group_by(lakeid, year, metric) %>% summarise(N = n()) %>% filter(N == 1)
## `summarise()` has grouped output by 'lakeid', 'year'. You can override using
## the `.groups` argument.
n_lakes_wide = left_join(no_dups, dat) %>% 
  select(-sampledate) %>% 
  pivot_wider(names_from = metric, values_from = daynum) %>% 
  filter(lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR"))
## Joining, by = c("lakeid", "year", "metric")
hold_data = na.omit(data.frame(n_lakes_wide %>%  ungroup() %>% select(-year, -N))) # , -doc_predict
all = lm(doc_epiMax~(iceon+straton+stratoff+energy+
                 stability+anoxia_summer+secchi_all+
                 secchi_openwater+total_zoop_biomass+daphnia_biomass)*lakeid, data=hold_data)
i_o = lm(doc_epiMax~1, data=hold_data)
hold_step = step(i_o, scope=formula(all))
## Start:  AIC=1737.66
## doc_epiMax ~ 1
## 
##                      Df Sum of Sq    RSS    AIC
## + lakeid              6     78192 370019 1705.8
## + stratoff            1     21732 426479 1728.3
## + total_zoop_biomass  1      4122 444089 1737.5
## <none>                            448211 1737.7
## + energy              1      3897 444314 1737.7
## + secchi_openwater    1      2877 445334 1738.2
## + daphnia_biomass     1      2510 445700 1738.4
## + straton             1      2041 446170 1738.6
## + secchi_all          1      1318 446893 1739.0
## + anoxia_summer       1       406 447805 1739.5
## + iceon               1       299 447912 1739.5
## + stability           1        12 448199 1739.7
## 
## Step:  AIC=1705.76
## doc_epiMax ~ lakeid
## 
##                      Df Sum of Sq    RSS    AIC
## + stratoff            1      6571 363448 1703.7
## + anoxia_summer       1      4428 365591 1705.0
## <none>                            370019 1705.8
## + daphnia_biomass     1      2831 367188 1706.0
## + total_zoop_biomass  1       801 369218 1707.3
## + straton             1       432 369587 1707.5
## + secchi_all          1       324 369695 1707.6
## + secchi_openwater    1       139 369880 1707.7
## + iceon               1       132 369887 1707.7
## + energy              1        40 369979 1707.7
## + stability           1        27 369992 1707.7
## - lakeid              6     78192 448211 1737.7
## 
## Step:  AIC=1703.65
## doc_epiMax ~ lakeid + stratoff
## 
##                      Df Sum of Sq    RSS    AIC
## + anoxia_summer       1      4750 358698 1702.6
## <none>                            363448 1703.7
## + daphnia_biomass     1      2324 361124 1704.2
## + total_zoop_biomass  1       801 362648 1705.2
## + straton             1       586 362862 1705.3
## + iceon               1       542 362907 1705.3
## + secchi_all          1       495 362953 1705.3
## + energy              1       154 363294 1705.6
## + secchi_openwater    1       145 363303 1705.6
## + stability           1       117 363332 1705.6
## - stratoff            1      6571 370019 1705.8
## + stratoff:lakeid     6     13873 349576 1706.7
## - lakeid              6     63031 426479 1728.3
## 
## Step:  AIC=1702.64
## doc_epiMax ~ lakeid + stratoff + anoxia_summer
## 
##                        Df Sum of Sq    RSS    AIC
## <none>                              358698 1702.6
## + daphnia_biomass       1      2138 356560 1703.3
## - anoxia_summer         1      4750 363448 1703.7
## + total_zoop_biomass    1       758 357940 1704.2
## + secchi_all            1       522 358176 1704.3
## + straton               1       476 358223 1704.3
## + energy                1       144 358555 1704.5
## + stability             1       134 358565 1704.6
## + iceon                 1       131 358567 1704.6
## + secchi_openwater      1        83 358615 1704.6
## - stratoff              1      6892 365591 1705.0
## + anoxia_summer:lakeid  6     13688 345010 1705.7
## + stratoff:lakeid       6     12182 346516 1706.7
## - lakeid                6     62932 421630 1727.7
doc_model = lm(doc_epiMax~(iceon+straton+stratoff+energy+
                 stability+anoxia_summer+secchi_all+
                 secchi_openwater+total_zoop_biomass+daphnia_biomass)*lakeid, 
               data=n_lakes_wide, 
               na.action = na.exclude)
summary(doc_model)
## 
## Call:
## lm(formula = doc_epiMax ~ (iceon + straton + stratoff + energy + 
##     stability + anoxia_summer + secchi_all + secchi_openwater + 
##     total_zoop_biomass + daphnia_biomass) * lakeid, data = n_lakes_wide, 
##     na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -112.408  -18.508    1.979   21.382   82.409 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                  2.311e+02  1.434e+02   1.612  0.10904   
## iceon                       -4.902e-01  3.104e-01  -1.579  0.11627   
## straton                     -1.220e-02  3.042e-01  -0.040  0.96806   
## stratoff                     9.291e-01  3.348e-01   2.775  0.00619 **
## energy                      -2.488e-01  2.152e-01  -1.156  0.24937   
## stability                   -1.471e-01  1.454e-01  -1.012  0.31304   
## anoxia_summer               -1.244e-01  1.594e-01  -0.781  0.43615   
## secchi_all                   8.906e-02  7.183e-02   1.240  0.21688   
## secchi_openwater            -2.327e-01  1.050e-01  -2.215  0.02817 * 
## total_zoop_biomass           4.952e-02  6.695e-02   0.740  0.46062   
## daphnia_biomass              9.345e-02  7.328e-02   1.275  0.20413   
## lakeid.L                     4.011e+01  3.961e+02   0.101  0.91948   
## lakeid.Q                     1.435e+02  3.762e+02   0.382  0.70331   
## lakeid.C                    -1.958e+02  3.882e+02  -0.504  0.61467   
## lakeid^4                     3.617e+02  3.847e+02   0.940  0.34847   
## lakeid^5                    -1.680e+02  3.801e+02  -0.442  0.65903   
## lakeid^6                    -1.908e+02  3.496e+02  -0.546  0.58593   
## iceon:lakeid.L              -5.231e-01  7.970e-01  -0.656  0.51260   
## iceon:lakeid.Q               7.488e-01  8.113e-01   0.923  0.35745   
## iceon:lakeid.C               8.301e-01  8.266e-01   1.004  0.31683   
## iceon:lakeid^4              -5.708e-01  7.929e-01  -0.720  0.47266   
## iceon:lakeid^5               9.560e-01  8.552e-01   1.118  0.26530   
## iceon:lakeid^6               9.133e-01  8.420e-01   1.085  0.27969   
## straton:lakeid.L             4.962e-01  8.050e-01   0.616  0.53847   
## straton:lakeid.Q            -1.749e-01  7.761e-01  -0.225  0.82200   
## straton:lakeid.C            -3.793e-01  8.146e-01  -0.466  0.64211   
## straton:lakeid^4             1.276e+00  8.270e-01   1.543  0.12482   
## straton:lakeid^5             7.029e-01  8.242e-01   0.853  0.39508   
## straton:lakeid^6            -5.570e-01  7.813e-01  -0.713  0.47689   
## stratoff:lakeid.L            4.236e-01  9.075e-01   0.467  0.64135   
## stratoff:lakeid.Q           -1.591e+00  7.816e-01  -2.036  0.04341 * 
## stratoff:lakeid.C            4.492e-01  9.173e-01   0.490  0.62500   
## stratoff:lakeid^4           -1.372e-01  9.896e-01  -0.139  0.88993   
## stratoff:lakeid^5           -5.437e-01  9.269e-01  -0.587  0.55835   
## stratoff:lakeid^6           -1.398e+00  7.704e-01  -1.814  0.07152 . 
## energy:lakeid.L             -4.274e-01  5.804e-01  -0.736  0.46264   
## energy:lakeid.Q             -4.231e-01  5.481e-01  -0.772  0.44139   
## energy:lakeid.C              1.554e-02  5.715e-01   0.027  0.97834   
## energy:lakeid^4             -4.046e-01  6.107e-01  -0.663  0.50858   
## energy:lakeid^5              2.961e-01  5.623e-01   0.527  0.59928   
## energy:lakeid^6              5.166e-01  5.398e-01   0.957  0.33997   
## stability:lakeid.L           1.717e-01  3.633e-01   0.473  0.63707   
## stability:lakeid.Q           5.557e-01  3.887e-01   1.430  0.15482   
## stability:lakeid.C          -2.151e-01  3.939e-01  -0.546  0.58572   
## stability:lakeid^4          -1.023e+00  3.235e-01  -3.162  0.00188 **
## stability:lakeid^5           6.405e-02  4.223e-01   0.152  0.87963   
## stability:lakeid^6          -3.525e-01  4.077e-01  -0.865  0.38860   
## anoxia_summer:lakeid.L      -2.248e-01  4.792e-01  -0.469  0.63960   
## anoxia_summer:lakeid.Q       6.326e-01  4.635e-01   1.365  0.17421   
## anoxia_summer:lakeid.C      -4.707e-01  4.141e-01  -1.137  0.25745   
## anoxia_summer:lakeid^4      -1.564e-01  4.457e-01  -0.351  0.72619   
## anoxia_summer:lakeid^5      -4.895e-01  3.366e-01  -1.454  0.14785   
## anoxia_summer:lakeid^6       1.068e+00  3.725e-01   2.867  0.00471 **
## secchi_all:lakeid.L         -5.570e-02  1.628e-01  -0.342  0.73267   
## secchi_all:lakeid.Q         -3.857e-01  1.760e-01  -2.192  0.02986 * 
## secchi_all:lakeid.C         -2.896e-01  1.979e-01  -1.463  0.14542   
## secchi_all:lakeid^4          1.813e-01  1.575e-01   1.151  0.25132   
## secchi_all:lakeid^5          3.066e-01  2.277e-01   1.347  0.18000   
## secchi_all:lakeid^6          4.941e-01  2.084e-01   2.371  0.01894 * 
## secchi_openwater:lakeid.L    3.880e-01  2.567e-01   1.511  0.13270   
## secchi_openwater:lakeid.Q   -6.669e-02  2.750e-01  -0.243  0.80870   
## secchi_openwater:lakeid.C    4.389e-01  2.879e-01   1.524  0.12943   
## secchi_openwater:lakeid^4    1.363e-01  2.269e-01   0.601  0.54880   
## secchi_openwater:lakeid^5   -6.341e-01  3.161e-01  -2.006  0.04656 * 
## secchi_openwater:lakeid^6   -3.994e-01  2.959e-01  -1.350  0.17896   
## total_zoop_biomass:lakeid.L -2.801e-03  1.743e-01  -0.016  0.98720   
## total_zoop_biomass:lakeid.Q -2.498e-01  1.837e-01  -1.360  0.17580   
## total_zoop_biomass:lakeid.C -4.307e-01  1.800e-01  -2.393  0.01790 * 
## total_zoop_biomass:lakeid^4  1.652e-02  1.539e-01   0.107  0.91468   
## total_zoop_biomass:lakeid^5 -1.730e-01  1.856e-01  -0.932  0.35270   
## total_zoop_biomass:lakeid^6  2.574e-01  1.833e-01   1.404  0.16220   
## daphnia_biomass:lakeid.L    -1.258e-01  2.072e-01  -0.607  0.54461   
## daphnia_biomass:lakeid.Q     1.845e-01  2.030e-01   0.909  0.36470   
## daphnia_biomass:lakeid.C     3.940e-01  1.953e-01   2.018  0.04532 * 
## daphnia_biomass:lakeid^4    -8.202e-02  1.928e-01  -0.426  0.67104   
## daphnia_biomass:lakeid^5     2.391e-01  1.826e-01   1.309  0.19233   
## daphnia_biomass:lakeid^6    -2.512e-01  1.811e-01  -1.387  0.16731   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.94 on 158 degrees of freedom
##   (38 observations deleted due to missingness)
## Multiple R-squared:  0.4674, Adjusted R-squared:  0.2112 
## F-statistic: 1.824 on 76 and 158 DF,  p-value: 0.0008178
n_lakes_wide$doc_predict = predict(doc_model, n_lakes_wide)

cor = n_lakes_wide %>% 
  group_by(lakeid) %>% 
  summarise(r = paste("r =", round(cor(doc_epiMax, doc_predict, use="complete.obs"), 3)))

n_lakes_wide %>% 
  ggplot(aes(x=doc_epiMax, y=doc_predict, color=as.character(lakeid))) +
  geom_point()+
  theme_bw() +
  labs(color="lakeid")+
  geom_abline(slope=1, intercept = 0) + 
  facet_wrap(~lakeid) + 
  geom_text(aes(label=r), data=cor, x=300, y=0) +
  labs(x="obs peak DOC (daynum)",  y="modeled peak DOC (daynum)")
## Warning: Removed 38 rows containing missing values (`geom_point()`).

# not good but vaguely positive relationship; use this instead of median?
missing_doc = n_lakes_wide %>% filter(is.na(doc_epiMax))

missing_doc$doc_fill = round(predict(doc_model, missing_doc))

doc_fill = missing_doc %>% 
  select(lakeid, year, doc_fill) %>% 
  rename(daynum_fill = doc_fill) %>% 
  mutate(metric = "doc_epiMax") %>% 
  filter(year < 2000)

Try to fill the missing chlorophyll values in the southern lakes

**Using dates from bad/uncalibrated chl data

filled_chl_dates = read_csv("Data/derived/chl_filled_peaks.csv") %>% 
  rename(daynum_fill = daynum) %>% 
  select(-sampledate)
## Rows: 36 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): lakeid, metric
## dbl  (2): year, daynum
## date (1): sampledate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Create filled metric column and filled

all_fill = bind_rows(iceoff_fill, doc_fill, filled_chl_dates)
dat_filled = full_join(dat, all_fill) %>% 
  mutate(filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill), TRUE, FALSE)) %>% 
  mutate(daynum_fill = ifelse(is.na(daynum) & !is.na(daynum_fill), daynum_fill, daynum))
## Joining, by = c("lakeid", "year", "metric")
dat_filled$lakeid = factor(dat_filled$lakeid, 
                    levels = c("AL", "BM", "CB", "CR", "SP", "TB", "TR", "FI", "ME", "MO", "WI"), 
                    ordered = T)
vars = c("iceoff", "straton", "secchi_all", "secchi_openwater", "daphnia_biomass","zoopDensity",
        "total_zoop_biomass", "chlor_all", "chlor_spring", "chlor_fall", "doc_epiMax",
        "totpuf_epiMax", "totpuf_epiMin", "totpuf_hypoMax", "totpuf_hypoMin", 
        "anoxia", "anoxia_summer", "stability", "energy", "stratoff", "iceon")
dat_filled$metric = factor(dat_filled$metric,
                    levels = rev(vars),
                    ordered=T)

dat_filled %>% 
  ggplot(aes(year, metric, fill=is.na(daynum_fill))) +
  geom_tile(color="grey") +
  theme_bw() +
  facet_wrap(~lakeid, nrow=6) # + 

  # labs(fill = "Missing") + 
  # scale_fill_viridis_c() 

Additional trimming/filling

Trim to “good” years, fill FI ice data with Monona, then fill additional missing with median values for each lake/metric

df_yearsWant = dat_filled %>% 
  filter((lakeid %in% c("FI", "ME", "MO", "WI") & year %in% 1996:2018) |
           (!(lakeid %in% c("FI", "ME", "MO", "WI")) & year %in% 1982:2018))

fi_icefill = df_yearsWant %>% 
  filter(lakeid == "MO" & metric %in% c("iceoff", "iceon")) %>% 
  mutate(lakeid = "FI") %>% 
  select(-daynum, -filled_flag, -sampledate) %>% 
  rename(icefill = daynum_fill)

df_yearsWant = df_yearsWant %>% 
  full_join(fi_icefill) %>% 
  mutate(daynum_fill = ifelse(is.na(daynum_fill) & !is.na(icefill), 
                              icefill, daynum),
         filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill) & !is.na(icefill), 
                              TRUE, filled_flag))
## Joining, by = c("lakeid", "year", "metric")
df_yearsWant %>% 
  ggplot(aes(year, metric, fill=filled_flag)) +
  geom_tile(color="grey") +
  theme_bw() +
  facet_wrap(~lakeid, nrow=6)

Final fill w/ medians

all_lys = df_yearsWant %>% 
  select(lakeid, year) %>% 
  distinct()

metric = unique(df_yearsWant$metric)

all_lys_want = expand_grid(all_lys, metric)

dat_final = left_join(all_lys_want, df_yearsWant)
## Joining, by = c("lakeid", "year", "metric")
medians = dat_final %>% 
  group_by(lakeid, metric) %>% 
  summarise(median_daynum = median(daynum_fill, na.rm=T))
## `summarise()` has grouped output by 'lakeid'. You can override using the
## `.groups` argument.
dat_final = dat_final %>% 
  left_join(medians) %>% 
  mutate(daynum_fill = ifelse(is.na(daynum_fill), median_daynum, daynum_fill)) %>% 
  mutate(filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill), TRUE, filled_flag)) %>% 
  select(-icefill, -median_daynum)
## Joining, by = c("lakeid", "metric")
dat_final %>%  
  ggplot(aes(year, metric, fill=daynum_fill)) +
  geom_tile(color="grey") +
  theme_bw() +
  facet_wrap(~lakeid, nrow=6)

write_csv(dat_final, "Data/analysis_ready/final_combined_dates_filled_v2.csv")

========================================================================================= ## Old code: no longer used

chlor_all

no_dups = dat %>% group_by(lakeid, year, metric) %>% summarise(N = n()) %>% filter(N == 1)
## `summarise()` has grouped output by 'lakeid', 'year'. You can override using
## the `.groups` argument.
s_lakes_wide = left_join(no_dups, dat) %>% 
  select(-sampledate) %>% 
  pivot_wider(names_from = metric, values_from = daynum) %>% 
  filter(lakeid %in% c("FI", "ME", "MO", "WI"))
## Joining, by = c("lakeid", "year", "metric")
# stepwise
hold_data = na.omit(data.frame(s_lakes_wide %>% filter(lakeid != "FI") %>% select(-chlor_spring, -chlor_fall, -daphnia_biomass, -total_zoop_biomass)))
all = lm(chlor_all~., data=hold_data)
i_o = lm(chlor_all~1, data=hold_data)
hold = step(i_o, scope=formula(all))
## Start:  AIC=336.42
## chlor_all ~ 1
## 
##                    Df Sum of Sq    RSS    AIC
## + lakeid            1   23765.6 147231 332.44
## + totpuf_epiMin     1   11056.3 159940 335.75
## + stratoff          1    9424.6 161572 336.15
## <none>                          170996 336.42
## + secchi_openwater  1    6617.2 164379 336.84
## + straton           1    3156.5 167840 337.68
## + secchi_all        1    2841.7 168155 337.75
## + zoopDensity       1    2542.1 168454 337.82
## + anoxia_summer     1    1765.7 169231 338.01
## + totpuf_epiMax     1    1110.1 169886 338.16
## + doc_epiMax        1    1019.7 169977 338.18
## + totpuf_hypoMin    1     669.8 170327 338.26
## + year              1      81.3 170915 338.40
## + iceon             1      75.2 170921 338.40
## + energy            1      23.2 170973 338.42
## + stability         1      21.0 170975 338.42
## + totpuf_hypoMax    1       3.0 170993 338.42
## + iceoff            1       0.7 170996 338.42
## 
## Step:  AIC=332.44
## chlor_all ~ lakeid
## 
##                    Df Sum of Sq    RSS    AIC
## + stratoff          1    9424.6 137806 331.79
## + straton           1    7685.7 139545 332.29
## <none>                          147231 332.44
## + totpuf_epiMin     1    7104.0 140127 332.46
## + zoopDensity       1    4245.0 142986 333.26
## + anoxia_summer     1    3720.5 143510 333.41
## + secchi_all        1    2301.3 144929 333.80
## + iceon             1    2087.2 145144 333.86
## + secchi_openwater  1    1484.8 145746 334.03
## + doc_epiMax        1     837.4 146393 334.21
## + energy            1     572.3 146658 334.28
## + totpuf_hypoMax    1     391.2 146840 334.33
## + iceoff            1     380.4 146850 334.33
## + stability         1     253.0 146978 334.37
## + totpuf_epiMax     1     204.1 147027 334.38
## + year              1      81.3 147149 334.41
## + totpuf_hypoMin    1      38.6 147192 334.42
## - lakeid            1   23765.6 170996 336.42
## 
## Step:  AIC=331.79
## chlor_all ~ lakeid + stratoff
## 
##                    Df Sum of Sq    RSS    AIC
## <none>                          137806 331.79
## + straton           1    5781.8 132024 332.07
## - stratoff          1    9424.6 147231 332.44
## + totpuf_epiMin     1    4535.5 133271 332.45
## + anoxia_summer     1    3362.7 134443 332.80
## + stability         1    2242.3 135564 333.13
## + zoopDensity       1    2237.4 135569 333.13
## + secchi_openwater  1    1590.3 136216 333.32
## + secchi_all        1    1342.3 136464 333.40
## + totpuf_epiMax     1     966.9 136839 333.51
## + doc_epiMax        1     875.3 136931 333.53
## + iceon             1     732.2 137074 333.58
## + energy            1     698.6 137108 333.59
## + iceoff            1     320.6 137486 333.70
## + year              1     109.6 137697 333.76
## + totpuf_hypoMin    1      99.0 137707 333.76
## + totpuf_hypoMax    1      93.2 137713 333.76
## - lakeid            1   23765.6 161572 336.15
chlAll_model_MEMOWI = lm(chlor_all~anoxia_summer + lakeid + iceon + stability + straton + stratoff, 
               data=s_lakes_wide %>% filter(lakeid != "FI"))
summary(chlAll_model_MEMOWI)
## 
## Call:
## lm(formula = chlor_all ~ anoxia_summer + lakeid + iceon + stability + 
##     straton + stratoff, data = s_lakes_wide %>% filter(lakeid != 
##     "FI"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -115.40  -44.11    0.33   41.41  122.46 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   -149.4119   271.2274  -0.551   0.5841  
## anoxia_summer    0.6454     0.3620   1.783   0.0806 .
## lakeid.L       -32.6015    26.4849  -1.231   0.2240  
## lakeid.Q       -32.9677    19.6738  -1.676   0.0999 .
## iceon            0.9296     0.6700   1.388   0.1713  
## stability       -0.5412     0.2784  -1.944   0.0574 .
## straton          0.6513     0.3881   1.678   0.0995 .
## stratoff        -0.4440     0.5099  -0.871   0.3880  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63.02 on 51 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.2751, Adjusted R-squared:  0.1756 
## F-statistic: 2.765 on 7 and 51 DF,  p-value: 0.01627
plot(s_lakes_wide %>% filter(lakeid != "FI") %>% pull(chlor_all), 
     predict(chlAll_model_MEMOWI, s_lakes_wide %>% filter(lakeid != "FI")),
     col=s_lakes_wide$lakeid, pch=16, xlab="observed", ylab="predicted")

# using to fill for now
missing_chlAll_MEMOWI = s_lakes_wide %>% filter(is.na(chlor_all) & lakeid != "FI")
missing_chlAll_MEMOWI$chl_all_fill = round(predict(chlAll_model_MEMOWI, missing_chlAll_MEMOWI))

chlAll_model_FI = lm(chlor_all~(stratoff+energy+
                 stability+anoxia_summer+secchi_all+
                 secchi_openwater), 
               data=s_lakes_wide %>% filter(lakeid == "FI"))
summary(chlAll_model_FI)
## 
## Call:
## lm(formula = chlor_all ~ (stratoff + energy + stability + anoxia_summer + 
##     secchi_all + secchi_openwater), data = s_lakes_wide %>% filter(lakeid == 
##     "FI"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -83.354 -31.601   1.305  37.879 109.887 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      890.13470  511.21808   1.741   0.1072  
## stratoff           0.43672    1.22603   0.356   0.7279  
## energy            -3.12342    1.40241  -2.227   0.0458 *
## stability         -0.23441    0.77265  -0.303   0.7668  
## anoxia_summer     -0.24990    0.65794  -0.380   0.7107  
## secchi_all        -0.04527    0.22865  -0.198   0.8464  
## secchi_openwater  -0.21407    0.26752  -0.800   0.4392  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63.82 on 12 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.329,  Adjusted R-squared:  -0.006451 
## F-statistic: 0.9808 on 6 and 12 DF,  p-value: 0.4788
plot(s_lakes_wide %>% filter(lakeid == "FI") %>% pull(chlor_all), 
     predict(chlAll_model_FI, s_lakes_wide %>% filter(lakeid == "FI")),
     col=s_lakes_wide$lakeid, pch=16)

missing_chlAll_FI = s_lakes_wide %>% filter(is.na(chlor_all) & lakeid == "FI")
missing_chlAll_FI$chl_all_fill = round(predict(chlAll_model_FI, missing_chlAll_FI))
# gives negative value; DONT fill FI

chlAll_fill = missing_chlAll_MEMOWI %>% 
  select(lakeid, year, chl_all_fill) %>% 
  rename(daynum_fill = chl_all_fill) %>% 
  mutate(metric = "chlor_all")